cap log close

global path = "****"
global path2 = "****"

local name= "sum_stats" 
local date = "081120"
global logname = "$path/`name'_`date'.log"

clear
set more off
set matsize 1000
cd "$path"
log using "$logname", replace

* Variables to go in the table
local individual female age_2017 age0_18 age19_26 age27_44 age45_64 age65plus
local household married income fpl_frac138 tot_exmpt self_prepared
local local hsorhigher baorhigher expansion SBM 
local penalty exempt2014 penalty_2014d penalty_2014 exempt2015 penalty_2015d penalty_2015 penalty 
local cov2015 any_covered2015 covered2015 full_year_2015
local cov2016 any_covered2016_first11 covered2016_first11 full_2016_first11

*Table 1
		
* -------------------------------------
* 1) Experimental Sample
* -------------------------------------	

use "$path/Data/sum_stats_data.dta",clear
merge 1:1 tin dep_tin using  "$path/Data/penalty_sample_prepcode",keepusing(prep_tin_flag) nogen

save "$path/Data/sum_stats_data_prepcode.dta",replace

gen age0_18 	= (age_2017 >= 0  & age_2017 <= 18)
gen age19_26	= (age_2017 >= 19 & age_2017 <= 26)
gen age27_49 	= (age_2017 >= 27 & age_2017 <= 49)
gen age50_64 	= (age_2017 >= 50 & age_2017 <= 64)
gen age65plus 	= (age_2017 >= 65 & age_2017 != .)

gen fpl_frac138 	= fpl_frac < 1.38
gen fpl_frac138_400 = fpl_frac >= 1.38 & fpl_frac < 4.00
gen fpl_frac400		= fpl_frac >= 4.00
gen self_prepared = (prep_tin_flag == "")


* Penalty dummy variables: penalty_2014d, penalty_2015d, penalty_2016d
sort tin penalty_2014d, stable
bysort tin: replace penalty_2014d = penalty_2014d[1] if _n != 1 
gen penalty_2015d = 1 
sort tin penalty_2016d, stable 
bysort tin: replace penalty_2016d = penalty_2016d[1] if _n != 1 

gen ind_count = 1 
bysort tin: gen hh_count = 1 if _n == 1

gen income = magi_2015 if primary == 1 
sort tin income, stable
bysort tin: replace income = income[1] if _n != 1 


cap gen full_year_2015 = (covered2015 == 12)

cap drop covered2016_first11
cap drop full_2016_first11
cap drop any_covered2016_first11

gen covered2016_first11 = covered2016 - any_dec
gen full_2016_first11 = (covered2016_first11 == 11) if covered2016_first11 != .
gen any_covered2016_first11 = (covered2016_first11 > 0) if covered2016_first11 != .

cap gen age27_44 = (age_2017 >= 27 & age_2017 <= 44)
cap gen age45_64 = (age_2017 >= 45 & age_2017 <= 64)

cap program drop sample_stats
program sample_stats, eclass
	 syntax varlist 
	 marksample touse
     markout `touse' `by'
     tempname mu mu0 mu1 d_p 
	 * Obtain averages:
	 foreach var of local varlist {
		 * All penalty group
         qui sum `var' 
			mat `mu' = nullmat(`mu'), r(mean)
		 * Control in penalty group
		 qui sum `var' if treatment == 0 
			mat `mu0' = nullmat(`mu0'), r(mean)
		 * Treatment  in penalty group
		qui sum `var' if treatment == 1 
			mat `mu1' = nullmat(`mu1'), r(mean)
		 * Obtain p-value based on clustered SE's
         qui reg `var' treatment , cluster(tin)
			mat b = r(table)
			mat `d_p'  = nullmat(`d_p'), b[4,1]
     }
	 * Obain counts:
	foreach var in ind_count hh_count {
		qui sum `var' 
			mat `mu' = nullmat(`mu'), r(N)
		qui sum `var' if treatment == 0 
			mat `mu0' = nullmat(`mu0'), r(N)
		qui sum `var' if treatment == 1 
			mat `mu1' = nullmat(`mu1'), r(N) 
	}
	foreach mat in mu mu0 mu1 {
		mat coln ``mat'' = `varlist' ind_count hh_count //adds variable names
		eret mat `mat' = ``mat'' //returned in e() structure
	}
	mat coln `d_p' = `varlist' //adds variable names
	eret mat d_p = `d_p' //returned in e() structure
end



estimates clear				
*full sample
sample_stats `individual' `household' `local' `penalty' `cov2015' `cov2016'


	
* ----------------------------
* 2) 1% SAMPLE SUMMARY STATS
* ----------------------------	 
use "$path2/Data/Compare_group/sum_for_onep_prepcode_v2.dta", clear 


cap gen age27_44 = (age_2017 >= 27 & age_2017 <= 44)
cap gen age45_64 = (age_2017 >= 45 & age_2017 <= 64)
gen self_prepared = (prep_tin_flag == "")

rename anycov2015 any_covered2015
rename anycov2016 any_covered2016

cap gen full_year_2015 = (covered2015 == 12)


cap drop covered2016_first11
cap drop full_2016_first11
cap drop any_covered2016_first11

gen covered2016_first11 = covered2016 - any_dec
gen full_2016_first11 = (covered2016_first11 == 11) if covered2016_first11 != .
gen any_covered2016_first11 = (covered2016_first11 > 0) if covered2016_first11 != .



gen ind_count = 1 
bysort tin: gen hh_count = 1 if _n == 1
		
cap program drop dset1_stats
program dset1_stats, eclass
	 syntax varlist 
	 marksample touse
	 markout `touse' `by'
	 tempname dset1 
	 * Obtain averages:
	 foreach var of local varlist {
		 qui sum `var' 
			mat `dset1' = nullmat(`dset1'), r(mean)
	 }
	 * Obain counts: 
	 foreach var in ind_count hh_count {
		 sum `var' 
			mat `dset1' = nullmat(`dset1'), r(N)
	} 
	
	 * ereturn column
	 mat coln `dset1' = `varlist' ind_count hh_count //adds variable names
	 eret mat dset1 = `dset1' //returned in e() structure
	
end

* calculate
dset1_stats `individual' `household' `local' `penalty' `cov2015' `cov2016'
	
		
* -------------------------------------
* 3) LESS THAN FULL COV SUMMARY STATS
* -------------------------------------	
use "$path2/Data/Compare_group/sum_for_notfull_prepcode_v2.dta", clear 
cap gen age27_44 = (age_2017 >= 27 & age_2017 <= 44)
cap gen age45_64 = (age_2017 >= 45 & age_2017 <= 64)
gen self_prepared = (prep_tin_flag == "")


rename anycov2015 any_covered2015
rename anycov2016 any_covered2016

cap drop covered2016_first11
cap drop full_2016_first11
cap drop any_covered2016_first11

gen covered2016_first11 = covered2016 - any_dec
gen full_2016_first11 = (covered2016_first11 == 11) if covered2016_first11 != .
gen any_covered2016_first11 = (covered2016_first11 > 0) if covered2016_first11 != .


 
gen ind_count = 1 
bysort tin: gen hh_count = 1 if _n == 1

cap program drop dset2_stats
program dset2_stats, eclass
	 syntax varlist 
	 marksample touse
	 markout `touse' `by'
	 tempname dset2 
	 * Obtain averages:
	 foreach var of local varlist {
		 qui sum `var' 
			mat `dset2' = nullmat(`dset2'), r(mean)
	 }
	 * Obain counts:
	 foreach var in ind_count hh_count {
		 sum `var' 
			mat `dset2' = nullmat(`dset2'), r(N)
	} 
	
	 * ereturn column
	 mat coln `dset2' = `varlist' ind_count hh_count //adds variable names
	 eret mat dset2 = `dset2' //returned in e() structure
	
end

* calculate
dset2_stats `individual' `household' `local' `penalty' `cov2015' `cov2016'	
	
* Save estimates
estimates save "$path/`name'_main_`date'.ster", replace 


* ----------------------------
* Covariate Balance
* ----------------------------	
use "$path/Data/sum_stats_data_prepcode.dta",clear 

gen income = magi_2015 if primary == 1 
sort tin income, stable
bysort tin: replace income = income[1] if _n != 1 
cap gen full_year_2015 = (covered2015 == 12)


cap drop covered2016_first11
cap drop full_2016_first11
cap drop any_covered2016_first11

gen covered2016_first11 = covered2016 - any_dec
gen full_2016_first11 = (covered2016_first11 == 11)
gen any_covered2016_first11 = (covered2016_first11 > 0)

cap gen age27_44 = (age_2017 >= 27 & age_2017 <= 44)
cap gen age45_64 = (age_2017 >= 45 & age_2017 <= 64)
gen self_prepared = (prep_tin_flag == "")


gen fpl_frac138 	= fpl_frac < 1.38
gen fpl_frac138_400 = fpl_frac >= 1.38 & fpl_frac < 4.00
gen fpl_frac400		= fpl_frac >= 4.00


* Variables in the table
local individual female age_2017 
local household married income fpl_frac138 tot_exmpt self_prepared
local local hsorhigher baorhigher expansion SBM 
local penalty exempt2014 penalty_2014d penalty_2014 exempt2015 penalty_2015 penalty 
local cov2015 any_covered2015 covered2015 full_year_2015
local cov2016 any_covered2016_first11 covered2016_first11 full_2016_first11

replace penalty_2014 = 0 if penalty_2014 == .
replace penalty_2015 = 0 if penalty_2015 == .

estimates clear				
eststo clear
regress treatment `individual' `household' `local' `penalty' `cov2015' `cov2016', cl(tin) 
eststo 
estimates save "$path/covariate_balance_`date'.ster", replace


* ----------------------------
* RESTRICTED SAMPLE SUMMARY STATS
* ----------------------------	

* Variables to go in the table
local individual female age_2017 age0_18 age19_26 age27_44 age45_64 age65plus
local household married income fpl_frac138 tot_exmpt self_prepared
local local hsorhigher baorhigher expansion SBM 
local penalty exempt2014 penalty_2014d penalty_2014 exempt2015 penalty_2015d penalty_2015 penalty 
local cov2015 any_covered2015 covered2015 full_year_2015
local cov2016 any_covered2016_first11 covered2016_first11 full_2016_first11

use "$path/Data/sum_stats_data_prepcode.dta",clear 

gen age0_18 	= (age_2017 >= 0  & age_2017 <= 18)
gen age19_26	= (age_2017 >= 19 & age_2017 <= 26)
gen age27_49 	= (age_2017 >= 27 & age_2017 <= 49)
gen age50_64 	= (age_2017 >= 50 & age_2017 <= 64)
gen age65plus 	= (age_2017 >= 65 & age_2017 != .)

gen fpl_frac138 	= fpl_frac < 1.38
gen fpl_frac138_400 = fpl_frac >= 1.38 & fpl_frac < 4.00
gen fpl_frac400		= fpl_frac >= 4.00
gen self_prepared = (prep_tin_flag == "")


* Penalty dummy variables: penalty_2014d, penalty_2015d, penalty_2016d
sort tin penalty_2014d, stable
bysort tin: replace penalty_2014d = penalty_2014d[1] if _n != 1 
gen penalty_2015d = 1 
sort tin penalty_2016d, stable 
bysort tin: replace penalty_2016d = penalty_2016d[1] if _n != 1 

gen ind_count = 1 
bysort tin: gen hh_count = 1 if _n == 1

gen income = magi_2015 if primary == 1 
sort tin income, stable
bysort tin: replace income = income[1] if _n != 1 
cap gen full_year_2015 = (covered2015 == 12)

cap drop covered2016_first11
cap drop full_2016_first11
cap drop any_covered2016_first11

gen covered2016_first11 = covered2016 - any_dec
gen full_2016_first11 = (covered2016_first11 == 11)
gen any_covered2016_first11 = (covered2016_first11 > 0)

cap gen age27_44 = (age_2017 >= 27 & age_2017 <= 44)
cap gen age45_64 = (age_2017 >= 45 & age_2017 <= 64)

cap program drop sample_stats
program sample_stats, eclass
	 syntax varlist 
	 marksample touse
     markout `touse' `by'
     tempname mu mu0 mu1 d_p 
	 * Obtain averages:
	 foreach var of local varlist {
		 * All penalty group
         qui sum `var' 
			mat `mu' = nullmat(`mu'), r(mean)
		 * Control in penalty group
		 qui sum `var' if treatment == 0 
			mat `mu0' = nullmat(`mu0'), r(mean)
		 * Treatment  in penalty group
		qui sum `var' if treatment == 1 
			mat `mu1' = nullmat(`mu1'), r(mean)
		 * Obtain p-value based on clustered SE's
         qui reg `var' treatment , cluster(tin)
			mat b = r(table)
			mat `d_p'  = nullmat(`d_p'), b[4,1]
     }
	 * Obain counts:
	foreach var in ind_count hh_count {
		qui sum `var' 
			mat `mu' = nullmat(`mu'), r(N)
		qui sum `var' if treatment == 0 
			mat `mu0' = nullmat(`mu0'), r(N)
		qui sum `var' if treatment == 1 
			mat `mu1' = nullmat(`mu1'), r(N) 
	}
	foreach mat in mu mu0 mu1 {
		mat coln ``mat'' = `varlist' ind_count hh_count //adds variable names
		eret mat `mat' = ``mat'' //returned in e() structure
	}
	mat coln `d_p' = `varlist' //adds variable names
	eret mat d_p = `d_p' //returned in e() structure
end



estimates clear				

*full sample
sample_stats `individual' `household' `local' `penalty' `cov2015' `cov2016'
estimates save "$path/`name'_full_sample_`date'.ster", replace 

*notall16
keep if covered2016-any_dec != 11
sample_stats `individual' `household' `local' `penalty' `cov2015' `cov2016' 
estimates save "$path/`name'_notall16_`date'.ster", replace 

*notall16 & age45_64
keep if age45_64 == 1
sample_stats `individual' `household' `local' `penalty' `cov2015' `cov2016' 
estimates save "$path/`name'_age45_64_`date'.ster", replace 



log close
